// Trade-Policy Dynamics: Evidence from 60 Years of U.S.-China Trade
// Alessandria,  Khan, Khederlarian, Ruhl, and Steinberg

// inputs:	tar_val.dta
//			SITC2-HS96.dta
//         	SITC2-HS92.dta
// 			hs_mfa_phase_20111208.dta	   	
//			Raw data/Historical
//			SITC2-TSUSA_concordance.dta
//			Raw data/Schott
//			SITC2-HS02.dta
//			SITC2-HS07.dta
//			SITC2-HS12.dta
//			SITC2-HS17.dta

// outputs:	spread_hs6.dta
//			spread_sitc2.dta
//			spread_2dsitc2.dta
//			spread_hs8.dta
//			mfa_sitc2.dta
//			mfa_hs8.dta
//			imports_sitc2_74-18.dta

// packages needed: egenmore


clear all 
set varabbrev off

global dir_raw "../01 Raw data/"
global dir_int "../20 Intermediate files/"
timer on 1
////////////////////////////////////////////////////////////////////////////////
// Concord several (HS, SITC) classifications to tariff spreads. 
{
	// SITC Rev 2, HS 6-digit
	use "${dir_raw}tar_val.dta", clear 
	keep hs8 ntr_rate nonntr_rate spread year
	gen hs6 = substr(hs8,1,6)
	collapse (mean) ntr_rate nonntr_rate spread, by(hs6 year)
	rename nonntr* nntr
	rename ntr* ntr
	rename spread s
	merge m:1 hs6 using "${dir_raw}SITC2-HS96.dta", gen(merge1)
	drop if merge1==2
	rename sitc2 sitc2_orig
	merge m:1 hs6 using "${dir_raw}SITC2-HS92.dta", gen(merge2)
	drop if merge2==2
	replace sitc2_orig = sitc2 if sitc2_orig==""
	drop sitc2 merge*
	rename sitc2_orig sitc2 
	save "${dir_int}spread_hs6.dta", replace
	foreach x in nntr ntr s {
		bysort year sitc2: egen `x'_med = median(`x')
		bysort year sitc2: egen `x'_mean = mean(`x')
	}
	preserve
		drop hs6 nntr ntr s
		duplicates drop
		drop if sitc2==""
		save "${dir_int}spread_sitc2.dta", replace
	restore
	drop *mean *med
	
	// SITC Rev 2, 2-digit
	gen sitc2_2d = substr(sitc2,1,2)
	foreach x in nntr ntr s {
		bysort year sitc2_2d: egen `x'_med = median(`x')
		bysort year sitc2_2d: egen `x'_mean = mean(`x')
	}
	drop hs6 nntr ntr s sitc2
	duplicates drop
	drop if sitc2_2d==""
	save "${dir_int}spread_2dsitc2.dta", replace

	// HS 8-digit
	use "${dir_raw}tar_val.dta", clear
	keep hs8 ntr_rate nonntr_rate spread year
	rename nonntr* nntr
	rename ntr* ntr
	rename spread s
	capture gen hs6 = substr(hs8,1,6)
	merge m:1 hs6 using "${dir_raw}SITC2-HS96.dta", gen(merge1)
	drop if merge1==2
	rename sitc2 sitc2_orig
	merge m:1 hs6 using "${dir_raw}SITC2-HS92.dta", gen(merge2)
	drop if merge2==2
	replace sitc2_orig = sitc2 if sitc2_orig==""
	drop sitc2 merge*
	rename sitc2_orig sitc2 
	save "${dir_int}spread_hs8.dta", replace
}

////////////////////////////////////////////////////////////////////////////////
// Generate MFA
{
	use "${dir_raw}hs_mfa_phase_20111208.dta", clear
	tostring hs, replace
	gen hs6 = substr(hs,1,6)
	merge m:1 hs6 using "${dir_raw}SITC2-HS96.dta", gen(merge1)
	drop if merge1==2
	rename sitc2 sitc2_orig
	merge m:1 hs6 using "${dir_raw}SITC2-HS92.dta", gen(merge2)
	drop if merge2==2
	replace sitc2_orig = sitc2 if sitc2_orig==""
	drop sitc2 merge*
	rename sitc2_orig sitc2 
	drop if sitc2==""

	preserve
		collapse (mean) phase, by(sitc2)
		replace phase = round(phase)
		save "${dir_int}mfa_sitc2.dta", replace
	restore
	
	gen hs8 = substr(hs,1,8)
	collapse (mean) phase, by(hs8)
	replace phase = round(phase)
	save "${dir_int}mfa_hs8.dta", replace
}

////////////////////////////////////////////////////////////////////////////
// Create 5-digit SITC Rev 2 dataset.
{	
	// 1974--1988
	use "${dir_raw}Historical/1974/Imports-1974.dta", clear
	forvalues x = 75/88 {
		append using "${dir_raw}Historical/19`x'/Imports-19`x'.dta"
	}
	drop if con_val_yr<=0
	gen tar_unwgt = cal_dut_yr/con_val_yr
	replace tar_unwgt = 0 if tar_unwgt<0
	merge m:1 tsusa year using "${dir_int}SITC2-TSUSA_concordance.dta", gen(merge1)
	drop if merge1==2
	drop merge*
	drop if sitc2=="" 
	bysort sitc2 year cty_code: egen countg_jst = nvals(tsusa)
	bysort sitc2 year cty_code: egen countgp_jst = nvals(tsusa dist_entry dist_unlad)
	collapse (mean) tar_unwgt (sum) gen_val_yr cal_dut_yr con_val_yr dut_val_yr con_qy1_yr con_cha_yr card_yr ves_val_yr air_val_yr ves_wgt_yr air_wgt_yr ves_cha_yr air_cha_yr (first) count*_*jst, by(sitc2 cty_code year) 
	rename con_val_yr cusvalue
	rename cal_dut_yr duties
	rename dut_val_yr dvalue
	rename con_qy1_yr quantity
	rename con_cha_yr charge
	rename cty_code uscode
	gen cifvalue = cusvalue + charge
	gen temp1 = length(uscode)
	replace uscode = uscode+"0" if temp1==3
	replace uscode = substr(uscode,1,3)+"0" if temp1==4
	replace uscode = "4700" if uscode=="4690"
	drop if substr(uscode,1,1)=="0"
	destring uscode, replace
	drop temp*
	//merge m:1 uscode using "${dir_raw}uscode_3d.dta", gen(merge1)
	merge m:1 uscode using "${dir_raw}uncodes_uscodes_wbcodes.dta", gen(merge1)
	keep if merge1==3
	drop merge1
	collapse (mean) tar_unwgt (sum) gen_val_yr duties cusvalue dvalue quantity charge cifvalue card_yr ves_val_yr air_val_yr ves_wgt_yr air_wgt_yr ves_cha_yr air_cha_yr (median) count*_*jst (first) uscountry, by(sitc2 wbcode year) 
	save "${dir_int}imports_sitc2_74-18.dta", replace
	
	// 1990--1999
	use "${dir_raw}Schott/imp_detl_yearly_89n", clear
	destring commodity, replace
	forvalues t = 0/9 {
		append using "${dir_raw}Schott/imp_detl_yearly_9`t'n", force
	}
	gen tar_unwgt = cal_dut_yr/con_val_yr
	replace tar_unwgt = 0 if tar_unwgt<0
	format commodity %10.0f
	tostring commodity, replace
	replace commodity = "0"+commodity if length(commodity)==9
	gen hs6 = substr(commodity,1,6)
	merge m:1 hs6 using "${dir_raw}SITC2-HS92.dta", gen(merge1)
	drop if merge1==2
	rename sitc2 sitc2_orig
	merge m:1 hs6 using "${dir_raw}SITC2-HS96.dta", gen(merge2)
	drop if merge2==2
	replace sitc2_orig=sitc2 if sitc2_orig==""
	drop merge* sitc2
	rename sitc2_orig sitc2
	drop if sitc2==""
	bysort sitc2 year cty_code: egen countg_jst = nvals(commodity)
	bysort sitc2 year cty_code: egen countgp_jst = nvals(commodity dist_entry dist_unlad)
	collapse (mean) tar_unwgt (sum) gen_val_yr cal_dut_yr con_val_yr dut_val_yr con_qy1_yr con_cha_yr cards_yr ves_val_yr air_val_yr ves_wgt_yr air_wgt_yr ves_cha_yr air_cha_yr (first) count*_*jst, by(sitc2 cty_code year) 
	rename con_val_yr cusvalue
	rename cal_dut_yr duties
	rename dut_val_yr dvalue
	rename con_qy1_yr quantity
	rename con_cha_yr charge
	rename cty_code uscode
	rename cards_yr card_yr
	gen cifvalue = cusvalue + charge
	replace uscode = real(substr(string(uscode),1,3)+"0")
	// merge m:1 uscode using "${dir_raw}uscode_3d.dta", gen(merge1)
	merge m:1 uscode using "${dir_raw}uncodes_uscodes_wbcodes.dta", gen(merge1)
	keep if merge1==3
	collapse (mean) tar_unwgt (sum) gen_val_yr duties cusvalue dvalue quantity charge cifvalue card_yr ves_val_yr air_val_yr ves_wgt_yr air_wgt_yr ves_cha_yr air_cha_yr (median) count*_*jst (first) uscountry , by(sitc2 wbcode year) 
	
	append using "${dir_int}imports_sitc2_74-18.dta"
	save "${dir_int}imports_sitc2_74-18.dta", replace
	
	// 2000--2008
	use "${dir_raw}Schott/imp_detl_yearly_100n", clear
	forvalues t = 1/9 {
		append using "${dir_raw}Schott/imp_detl_yearly_10`t'n", force
	}
	gen tar_unwgt = cal_dut_yr/con_val_yr
	replace tar_unwgt = 0 if tar_unwgt<0
	format commodity %10.0f
	tostring commodity, replace
	replace commodity = "0"+commodity if length(commodity)==9
	gen hs6 = substr(commodity,1,6)
	merge m:1 hs6 using "${dir_raw}SITC2-HS96.dta", gen(merge1)
	drop if merge1==2
	rename sitc2 sitc2_orig
	merge m:1 hs6 using "${dir_raw}SITC2-HS02.dta", gen(merge2)
	drop if merge2==2
	replace sitc2_orig=sitc2 if sitc2_orig==""
	drop sitc2
	merge m:1 hs6 using "${dir_raw}SITC2-HS07.dta", gen(merge3)
	drop if merge3==2
	replace sitc2_orig=sitc2 if sitc2_orig==""
	drop merge* sitc2
	rename sitc2_orig sitc2
	drop if sitc2=="" 
	bysort sitc2 year cty_code: egen countg_jst = nvals(commodity)
	bysort sitc2 year cty_code: egen countgp_jst = nvals(commodity dist_entry dist_unlad)
	collapse (mean) tar_unwgt (sum) gen_val_yr cal_dut_yr con_val_yr dut_val_yr con_qy1_yr con_cha_yr cards_yr ves_val_yr air_val_yr ves_wgt_yr air_wgt_yr ves_cha_yr air_cha_yr (first) count*_*jst, by(sitc2 cty_code year) 
	rename con_val_yr cusvalue
	rename cal_dut_yr duties
	rename dut_val_yr dvalue
	rename con_qy1_yr quantity
	rename con_cha_yr charge
	rename cty_code uscode
	rename cards_yr card_yr
	gen cifvalue = cusvalue + charge
	replace uscode = real(substr(string(uscode),1,3)+"0")
	//merge m:1 uscode using "${dir_raw}uscode_3d.dta", gen(merge1)
	merge m:1 uscode using "${dir_raw}uncodes_uscodes_wbcodes.dta", gen(merge1)
	keep if merge1==3
	collapse (mean) tar_unwgt (sum) gen_val_yr duties cusvalue dvalue quantity charge cifvalue card_yr ves_val_yr air_val_yr ves_wgt_yr air_wgt_yr ves_cha_yr air_cha_yr (median) count*_*jst (first) uscountry , by(sitc2 wbcode year) 
	
	append using "${dir_int}imports_sitc2_74-18.dta"
	save "${dir_int}imports_sitc2_74-18.dta", replace	
}

////////////////////////////////////////////////////////////////////////////////
// Merge spreads and MFA with the SITC Rev 2 5-digit data.
{
// merge spreads
merge m:1 year sitc2 using "${dir_int}spread_sitc2.dta"
drop if _merge==2
drop _merge
foreach x in nntr_med nntr_mean ntr_med ntr_mean s_med s_mean {
	bysort sitc2: egen `x'_01 = mean(`x'/(year==2001))
	bysort sitc2: egen `x'_99 = mean(`x'/(year==1999))
	bysort sitc2: egen `x'_94 = mean(`x'/(year==1994))
}
drop nntr_med nntr_mean ntr_med ntr_mean s_med s_mean

// merge mfa
merge m:1 sitc2 using "${dir_int}\mfa_sitc2.dta"
drop if _merge==2
drop _merge
replace phase = 0 if phase==.

rename uscountry cty
rename wbcode iso3
rename cusvalue v_jst
rename cifvalue vcif_jst
rename duties duties_jst 
rename dvalue dvalue_jst 
rename charge charge_jst 
rename quantity qty_jst 
rename card_yr card_jst 
replace duties_jst = 0 if duties_jst<=0 

label variable v_jst        "Imports (FOB) from exporter j ind s year t"
label variable vcif_jst     "Imports inc. IF costs"
label variable duties_jst   "Tariff revenue"
label variable dvalue_jst   "Dutiable import value"
label variable qty_jst      "Quantities (summed across products)"
label variable charge_jst   "Insurance and Freight Costs"
label variable nntr_med_01  "Non-NTR Tariff rates (median across products)"
label variable nntr_mean_01 "Non-NTR Tariff rates (mean across products)"
label variable ntr_med_01   "NTR Tariff rates 2001 (median across products)"
label variable ntr_mean_01  "NTR Tariff rates 2001 (mean across products)"
label variable s_med_01     "Spreads 2001 (median across products)"
label variable s_mean_01    "Spreads 2001 (mean across products)"
label variable tar_unwgt    "Applied Duties (unweighted across products)"

save "${dir_int}imports_sitc2_74-18.dta", replace
}

timer off 1
timer list
